# Keep things nice and tidy, all libraries go here
library(readxl)
library(tidyverse)
## ── Attaching packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.0 ✓ purrr 0.3.3
## ✓ tibble 2.1.3 ✓ dplyr 0.8.5
## ✓ tidyr 1.0.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(knitr)
library(kableExtra)
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
library(svglite)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
data <- read_excel("data.xlsx", skip = 1)
## New names:
## * `` -> ...35
## * `` -> ...71
data <- data %>% filter(is.na(Exclude))
ggplot(data, aes(x=as.factor(Year))) +
geom_bar() +
ylab("Number of publications") +
xlab("Year") +
geom_text(stat='count', aes(label=..count..), vjust=2, color="white", size = 2.5) +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
ggsave("yearly_distribution.eps")
## Saving 7 x 5 in image
# Cleaning not needed values
data<-data %>%
mutate(Academia = replace(Academia, Academia == "?", NA)) %>%
mutate(Industry = replace(Industry, Industry == "?", NA))
data<-data %>%
mutate(Type = case_when(is.na(Academia) & is.na(Industry) ~ "None",
Academia == "1.0" & is.na(Industry) ~ "Academia",
Industry == "1.0" & is.na(Academia) ~ "Industry",
TRUE ~ "Both"))
data %>%
mutate(Type = fct_infreq(Type, ordered = T)) %>%
ggplot(aes(x=Type)) +
geom_bar(width = .5) +
xlab("Type of publication") +
ylab("Number of publications") +
geom_text(stat='count', aes(label=..count..), vjust=3, color="white", size = 4) +
theme_bw()
ggsave("academia_industry_distribution.eps")
## Saving 7 x 5 in image
A publication can be in more than one category at the same time.
data %>%
select(7:21) %>% # selecting columns corresponding to the SWEBoK Areas
mutate_all(replace_na,0) %>%
summarise_all(sum) %>%
gather(key = "SWEBOKArea", value = "publications", 1:15) %>%
arrange(-publications) %>%
mutate(SWEBOKArea = factor(SWEBOKArea, SWEBOKArea)) %>%
ggplot(aes(x=SWEBOKArea, y=publications)) +
geom_bar(stat="identity") +
geom_text(aes(label=publications), vjust=-0.3, color="black", size = 4) +
xlab("SWEBoK Area") +
ylab("Number of publications") +
theme_bw()
ggsave("swebok_distribution.eps")
## Saving 7 x 5 in image
swebokareas<-data %>%
select(7:21) %>% # selecting columns corresponding to the SWEBoK Areas
mutate_all(replace_na,0) %>%
as.matrix() %>%
crossprod()
swebokareas %>%
kable()
| SR | SD | SC | ST | SM | SCM | SEM | SEP | SEMM | SQ | SEPP | SEE | CF | MF | EF | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| SR | 49 | 18 | 5 | 2 | 4 | 0 | 7 | 2 | 4 | 0 | 7 | 0 | 0 | 0 | 1 |
| SD | 18 | 66 | 17 | 3 | 4 | 0 | 6 | 2 | 6 | 1 | 6 | 0 | 0 | 0 | 1 |
| SC | 5 | 17 | 77 | 5 | 22 | 1 | 3 | 2 | 2 | 0 | 3 | 0 | 0 | 0 | 0 |
| ST | 2 | 3 | 5 | 12 | 4 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| SM | 4 | 4 | 22 | 4 | 46 | 1 | 2 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 |
| SCM | 0 | 0 | 1 | 0 | 1 | 2 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| SEM | 7 | 6 | 3 | 1 | 2 | 0 | 26 | 3 | 1 | 0 | 7 | 3 | 0 | 0 | 1 |
| SEP | 2 | 2 | 2 | 0 | 1 | 1 | 3 | 10 | 0 | 0 | 2 | 1 | 0 | 0 | 0 |
| SEMM | 4 | 6 | 2 | 0 | 0 | 0 | 1 | 0 | 8 | 0 | 1 | 0 | 0 | 0 | 0 |
| SQ | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | 0 | 0 | 0 | 0 | 0 |
| SEPP | 7 | 6 | 3 | 0 | 1 | 0 | 7 | 2 | 1 | 0 | 18 | 3 | 0 | 0 | 1 |
| SEE | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | 0 | 0 | 3 | 5 | 0 | 0 | 0 |
| CF | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| MF | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| EF | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 |
plot_ly(x=c("SR", "SD", "SC", "ST", "SM", "SEM", "SEP", "SEMM", "SQ", "SEPP", "SEE", "CF", "MF", "EF"), y=c("SR", "SD", "SC", "ST", "SM", "SEM", "SEP", "SEMM", "SQ", "SEPP", "SEE", "CF", "MF", "EF"), z=swebokareas, type="heatmap")
x <- data %>% select(7:21, matches('Attention|Memory|Cognitive load|CL$|Problem solving|Reasoning|Decision making|Errors| biases$' )) %>%
mutate_all(replace_na, 0) %>%
mutate(`Problem solving`, `Problem solving` = as.numeric(`Problem solving`)) %>%
gather(key="SWEBOK", value = pubs, 1:15) %>% # use SWEBOK area as factor
filter(pubs > 0) %>% # select areas for which there are publications
group_by(SWEBOK) %>%
summarise_all(sum) %>% # number of publication for each area
select(-pubs) %>% # remove pubs to reuse it later
gather(key = "Taxonomy", value = "count", 2:17) %>% # count publications in each cognitive taxonomy area
mutate(label = str_replace(as.character(count), "^0", "")) # add label for later
## Warning: NAs introduced by coercion
# Bubble plot
x <- arrange(x, Taxonomy)
xf<-x$Taxonomy
xfu<-unique(xf)
x$Taxonomy<-factor(xf,levels=xfu)
p<-ggplot(x)
p + geom_point(aes(x = fct_infreq(SWEBOK), y = fct_rev(Taxonomy), size=count), shape=21, fill="white", alpha=0.60) +
geom_text(aes(x = fct_infreq(SWEBOK), y = fct_rev(Taxonomy), label=label), size=2) +
theme(axis.text.x = element_text(angle = 45, hjust = 1.1, size=9,colour="black"), axis.text.y = element_text(size=8,colour="black"), axis.title.x = element_text(size=10), axis.title.y = element_text(size=10,colour = "black",vjust=0.12), panel.grid.major = element_line(linetype = "dashed", size=0.1, color="black"))+
labs(x="SWEBOK Area",y = "Taxonomy Area") + theme_bw()
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_text).
ggsave("swebok_taxonomy_bubble.pdf")
## Saving 7 x 5 in image
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_text).
# Preparing the dataset for analysing the research methods
data<-data %>%
mutate(Quantitative = case_when(`Quantit. measures` == 1 | `Task performance` == 1 | `Physiological meas.` == 1 | `Subjective ratings` == 1 | `Behavioral meas.` == 1 ~ 1)) %>%
mutate(Quantitative = replace_na(Quantitative, 0)) %>%
mutate(Qualitative = case_when(Fieldwork == 1 | Interview == 1 | `Qualit. measures` == 1 | `Task-based` == 1 | `Open observation` == 1 ~ 1)) %>%
mutate(Qualitative = replace_na(Qualitative, 0)) %>%
mutate(Both = if_else(Qualitative == 1 & Quantitative == 1, 1, 0))
Number of publications per year according to SWEBOK areas
# Creating a temp dataset with missing publications years (i.e., year for which there was no publication)
data %>%
filter(is.na(Exclude)) %>%
select(c(Year, SR:EF)) %>%
gather("SWEBOK", "publications", 2:16) %>%
mutate_all(replace_na, 0) %>%
group_by(Year,SWEBOK) %>%
summarise(total=sum(publications)) %>%
ggplot(aes(x=as.factor(Year), fill=SWEBOK, y=total)) + geom_bar(stat="sum") +
xlab("Year") + ylab("Publications") + scale_fill_discrete(name = "SWEBOK Areas") + guides(size = F) + theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 6))
ggsave("years_swebok.pdf")
## Saving 7 x 5 in image
data <- data %>% complete(Year=seq(1973,2016))
data <- data %>%
mutate(research_method = if_else(Both==1, "Mixed", if_else(Qualitative==1, "Qualitative", "Quantitative")))
data %>% ggplot(aes(x=as.factor(Year), fill=research_method)) + geom_bar() + scale_fill_discrete(name="Research method", labels = c("Mixed", "Qualitative", "Quantitative", ""), na.value = "transparent") + xlab("Year") + ylab("Publications") + theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 5))
ggsave("years_researchmethods.pdf")
## Saving 7 x 5 in image
data.swebok.researchmethod <- data %>%
select(7:21, research_method) %>%
mutate_all(replace_na,0) %>%
group_by(research_method) %>%
summarise_at(vars(SR:EF), sum) %>%
slice(2:4) %>%
gather("SWEBOK", "Publications", 2:16)
data.swebok.researchmethod %>%
ggplot(aes(x=reorder(SWEBOK, Publications, function(x){sum(x)}), y=Publications, fill=research_method)) + geom_bar(stat = "identity") +
coord_flip() + xlab("SWEBOK areas") + scale_fill_discrete(name = "Research method")
ggsave("SWBOK_researchmethods.pdf")
## Saving 7 x 5 in image
data %>%
filter(!is.na(Identifier)) %>%
select(Identifier, Attention:`Extrinsic CL`, Perception:`Social Cognition`, Fieldwork:`Behavioral meas.`) %>%
gather(Taxonomy, value, Attention:`Social Cognition`) %>%
filter(!is.na(value)) %>%
select(-value) %>%
gather(Method, value, Fieldwork:`Behavioral meas.`) %>%
filter(!is.na(value)) %>%
arrange(Identifier) %>%
select(-Identifier, -value) %>%
group_by(Taxonomy, Method) %>%
tally(name = "Amount") %>%
ggplot(aes(x=Method, y=Taxonomy, fill=Amount)) +
geom_point(aes(size=Amount), alpha=0.5) +
theme(legend.position = "") + theme(axis.text.x = element_text(angle = 30, hjust = 1, size = 8))
ggsave("taxonomy_methods.pdf")
## Saving 7 x 5 in image
data %>%
select(SR:EF, Attention:`Extrinsic CL`, Perception:`Social Cognition`) %>%
mutate_all(replace_na,0) %>%
gather(SWEBOK, value, SR:EF) %>%
gather(Taxonomy, value2, Attention:`Social Cognition`) %>%
count(SWEBOK, Taxonomy, value, value2) %>%
mutate(freq=ifelse(value==1 & value2==1, n, 0)) %>%
ggplot(aes(fct_rev(SWEBOK), fct_rev(Taxonomy), fill=freq)) +
geom_tile() + scale_fill_continuous(low="#fff9f7", high="red") +
xlab("SWEBOK") + ylab("Topic") + guides(fill=guide_legend(title="")) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 30, hjust = 1, size = 8))
ggsave("taxomony_swebok_cooccurences.pdf")
## Saving 7 x 5 in image
data %>%
select(Attention:`Extrinsic CL`, Perception:`Social Cognition`, Fieldwork:`Behavioral meas.`) %>%
mutate_all(replace_na,0) %>%
gather(Taxonomy, value, Attention:`Social Cognition`) %>%
gather(Method, value2, Fieldwork:`Behavioral meas.`) %>%
count(Taxonomy, Method, value, value2) %>%
mutate(freq=ifelse(value==1 & value2==1, n, 0)) %>%
ggplot(aes(Method, fct_rev(Taxonomy), fill=freq)) + geom_tile() +
xlab("Measure") + ylab("Topic") + guides(fill=guide_legend(title="")) +
scale_fill_continuous(low="#fff9f7", high="darkgreen") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 30, hjust = 1, size = 8))
ggsave("taxonomy_method_cooccurences.pdf")
## Saving 7 x 5 in image
data %>% select(Year, Attention:`Extrinsic CL`, Perception:`Social Cognition`) %>% gather("Taxonomy", "publications", Attention:`Social Cognition`) %>% mutate_all(replace_na,0) %>% mutate(publications=as.integer(publications)) %>% group_by(Year, Taxonomy) %>% summarise(total=sum(publications)) %>% ggplot(aes(as.factor(Year), total, fill=Taxonomy)) + geom_bar(stat="sum") + xlab("Year") + ylab("Publications") + scale_fill_discrete(name = "Taxonomy Areas") + guides(size = F) + theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 6))
## Warning: NAs introduced by coercion
## Warning: Removed 1 rows containing non-finite values (stat_sum).
df.taxonomy <- data %>%
select(Year, Attention:`Extrinsic CL`, Perception:`Social Cognition`) %>%
gather("Taxonomy", "publications", Attention:`Social Cognition`) %>%
mutate_all(replace_na,0) %>%
mutate(publications=as.integer(publications)) %>% # for some reseason recognized as char
filter(publications>0)
## Warning: NAs introduced by coercion
# need to create a separated df to hold the percentage of publications within each year
data.percentage <- df.taxonomy %>%
group_by(Year) %>%
count(Taxonomy) %>%
mutate(ratio = scales::percent(n/sum(n)))
df.taxonomy %>%
ggplot(aes(x = as.factor(Year), fill = as.factor(Taxonomy))) +
geom_bar(position="fill") +
geom_text(data = data.percentage, aes(y = n,label = ratio), position = position_fill(vjust = 0.5), colour = "white", size = 1.3) +
xlab("Year") + ylab("Publications %") +
scale_fill_discrete(name = "Topic") + guides(size = F) +
scale_y_continuous(labels = percent) +
theme_minimal() + theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
theme(legend.key.size = unit(.2, "cm"), legend.key.width = unit(0.2,"cm"), legend.title = element_text(size = 8), legend.text = element_text(size = 6))
ggsave("taxonomy_years.pdf", width = unit(9, "inch"), height = unit(6.5, "inch"))